import numpy as np
import pandas as pd
import geopandas as gpd
import altair as alt
import requests
from tqdm import tqdm
import datetime
vaccination_data_url = 'https://health-security.rnbo.gov.ua/api/vaccination/process/table?perPage=125&page={}&orderBy=date&dose=all&distributionBy=vaccine&vaccines=&period=daily&aggregateBy=regions'
def get_vaccination_data(page_num: int) -> pd.DataFrame:
url = vaccination_data_url.format(page_num)
df = pd.DataFrame(requests.get(url).json()['data'])
df = df.drop(columns=['SarsCov2_nRVv3', 'SarsCov2_nRVv3_pct'])
df = df.rename(columns={'name': 'region'})
df = df.fillna(0)
df.date = pd.to_datetime(df.date, infer_datetime_format=True)
return df
vaccination_ukraine_df = get_vaccination_data(1)
for i in tqdm(range(2, 37)):
vaccination_ukraine_df = vaccination_ukraine_df.append(get_vaccination_data(i))
100%|███████████████████████████████████████████| 35/35 [02:24<00:00, 4.14s/it]
vaccination_ukraine_df.head()
| date | region | Moderna | AstraZeneca | Moderna_pct | AstraZeneca_pct | Pfizer-BioNTech | Pfizer-BioNTech_pct | Sinovac (CoronaVac) | Sinovac (CoronaVac)_pct | Johnson & Johnson | Johnson & Johnson_pct | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021-12-22 | Київська область | 154 | 215 | 2.81 | 3.92 | 2780 | 50.73 | 2331 | 42.54 | 0.0 | 0.0 |
| 1 | 2021-12-22 | Луганська область | 409 | 58 | 18.78 | 2.66 | 972 | 44.63 | 739 | 33.93 | 0.0 | 0.0 |
| 2 | 2021-12-22 | Волинська область | 359 | 115 | 10.49 | 3.36 | 2248 | 65.71 | 699 | 20.43 | 0.0 | 0.0 |
| 3 | 2021-12-22 | Миколаївська область | 251 | 25 | 10.07 | 1.00 | 1251 | 50.20 | 965 | 38.72 | 0.0 | 0.0 |
| 4 | 2021-12-22 | Черкаська область | 338 | 122 | 10.77 | 3.89 | 1350 | 43.03 | 1327 | 42.30 | 0.0 | 0.0 |
vaccination_ukraine_df.dtypes
date datetime64[ns] region object Moderna int64 AstraZeneca int64 Moderna_pct float64 AstraZeneca_pct float64 Pfizer-BioNTech int64 Pfizer-BioNTech_pct float64 Sinovac (CoronaVac) int64 Sinovac (CoronaVac)_pct float64 Johnson & Johnson float64 Johnson & Johnson_pct float64 dtype: object
vaccination_ukraine_df.describe()
| Moderna | AstraZeneca | Moderna_pct | AstraZeneca_pct | Pfizer-BioNTech | Pfizer-BioNTech_pct | Sinovac (CoronaVac) | Sinovac (CoronaVac)_pct | Johnson & Johnson | Johnson & Johnson_pct | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
| mean | 591.856889 | 624.592889 | 11.108153 | 11.851831 | 2527.220222 | 44.897076 | 1873.640222 | 31.929253 | 4.508889 | 0.169304 |
| std | 1307.180669 | 743.954089 | 16.031511 | 9.428439 | 2523.799513 | 15.915060 | 2028.583628 | 14.992706 | 83.140434 | 3.132487 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 150.000000 | 0.000000 | 4.887500 | 1020.000000 | 35.757500 | 547.750000 | 21.725000 | 0.000000 | 0.000000 |
| 50% | 26.500000 | 400.000000 | 0.675000 | 10.195000 | 1793.500000 | 45.510000 | 1241.000000 | 30.890000 | 0.000000 | 0.000000 |
| 75% | 705.000000 | 801.250000 | 19.335000 | 16.735000 | 3194.250000 | 55.042500 | 2495.500000 | 39.890000 | 0.000000 | 0.000000 |
| max | 15998.000000 | 6936.000000 | 98.300000 | 100.000000 | 21139.000000 | 100.000000 | 19105.000000 | 100.000000 | 2300.000000 | 80.910000 |
covid_stats_data_url = 'https://api-covid19.rnbo.gov.ua/data?to={}'
def get_covid_stats_data(date: datetime.date) -> pd.DataFrame:
url = covid_stats_data_url.format(date.isoformat())
df = pd.json_normalize(requests.get(url).json()['ukraine'])
df['date'] = date.isoformat()
df = df.drop(columns=['id', 'country'])
df = df.rename(columns={'label.uk': 'region', 'label.en': 'region_en'})
df = df.fillna(0)
df.date = pd.to_datetime(df.date, infer_datetime_format=True)
return df
today_date = datetime.date.today()
covid_stats_ukraine_df = get_covid_stats_data(today_date)
for i in tqdm(range(1, 180)):
covid_stats_ukraine_df = covid_stats_ukraine_df.append(get_covid_stats_data(today_date - datetime.timedelta(days=i)))
100%|█████████████████████████████████████████| 179/179 [01:29<00:00, 2.00it/s]
covid_stats_ukraine_df.head()
| confirmed | deaths | recovered | existing | suspicion | lat | lng | delta_confirmed | delta_deaths | delta_recovered | delta_existing | delta_suspicion | region_en | region | date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 111059 | 3029 | 99285 | 8745 | 59638 | 48.920517 | 28.685484 | 201 | 13 | 1046 | -858 | 0 | Vinnytsia | Вінницька область | 2021-12-23 |
| 1 | 102685 | 2148 | 97524 | 3013 | 49951 | 51.190885 | 24.870603 | 221 | 4 | 289 | -72 | 0 | Volynska | Волинська область | 2021-12-23 |
| 2 | 240883 | 8707 | 227545 | 4631 | 96446 | 48.301142 | 34.844169 | 231 | 21 | 163 | 47 | 0 | Dnipropetrovska | Дніпропетровська область | 2021-12-23 |
| 3 | 165359 | 5041 | 154439 | 5879 | 60058 | 48.047635 | 37.674086 | 531 | 13 | 789 | -271 | 0 | Donetska | Донецька область | 2021-12-23 |
| 4 | 139486 | 3204 | 133023 | 3259 | 72918 | 50.639546 | 28.475602 | 243 | 8 | 542 | -307 | 0 | Zhytomyrskа | Житомирська область | 2021-12-23 |
covid_stats_ukraine_df.dtypes
confirmed int64 deaths int64 recovered int64 existing int64 suspicion int64 lat float64 lng float64 delta_confirmed int64 delta_deaths int64 delta_recovered int64 delta_existing int64 delta_suspicion int64 region_en object region object date datetime64[ns] dtype: object
covid_stats_ukraine_df.describe()
| confirmed | deaths | recovered | existing | suspicion | lat | lng | delta_confirmed | delta_deaths | delta_recovered | delta_existing | delta_suspicion | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.0 |
| mean | 106490.874222 | 2542.880889 | 97094.810222 | 6853.183111 | 73654.800000 | 49.172537 | 30.511702 | 310.410000 | 9.314444 | 273.655111 | 27.440444 | 0.0 |
| std | 53512.977875 | 1431.192910 | 47307.370362 | 8930.202204 | 33647.792136 | 1.360405 | 4.423417 | 394.763285 | 13.636438 | 455.353751 | 411.358887 | 0.0 |
| min | 20610.000000 | 837.000000 | 19096.000000 | 46.000000 | 17789.000000 | 46.518829 | 23.275854 | 0.000000 | 0.000000 | 0.000000 | -4949.000000 | 0.0 |
| 25% | 72995.250000 | 1511.750000 | 69254.750000 | 529.000000 | 57582.000000 | 48.301142 | 26.391115 | 28.000000 | 0.000000 | 16.000000 | -14.000000 | 0.0 |
| 50% | 91066.000000 | 2076.500000 | 86546.500000 | 2880.500000 | 72918.000000 | 49.260159 | 30.602712 | 148.000000 | 3.000000 | 70.000000 | 16.000000 | 0.0 |
| 75% | 135761.250000 | 3044.250000 | 125438.750000 | 10569.500000 | 86388.000000 | 50.299214 | 33.778584 | 452.250000 | 13.000000 | 333.250000 | 138.250000 | 0.0 |
| max | 325257.000000 | 8707.000000 | 305034.000000 | 51211.000000 | 182459.000000 | 51.355018 | 39.018116 | 2616.000000 | 144.000000 | 5309.000000 | 2028.000000 | 0.0 |
covid_and_vaccination_ukraine_df = vaccination_ukraine_df.merge(covid_stats_ukraine_df, left_on=['region', 'date'], right_on=['region', 'date'])
covid_and_vaccination_ukraine_df
| date | region | Moderna | AstraZeneca | Moderna_pct | AstraZeneca_pct | Pfizer-BioNTech | Pfizer-BioNTech_pct | Sinovac (CoronaVac) | Sinovac (CoronaVac)_pct | ... | existing | suspicion | lat | lng | delta_confirmed | delta_deaths | delta_recovered | delta_existing | delta_suspicion | region_en | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021-12-22 | Київська область | 154 | 215 | 2.81 | 3.92 | 2780 | 50.73 | 2331 | 42.54 | ... | 9930 | 101468 | 50.299214 | 30.455333 | 354 | 22 | 1309 | -977 | 0 | Kyivska |
| 1 | 2021-12-22 | Луганська область | 409 | 58 | 18.78 | 2.66 | 972 | 44.63 | 739 | 33.93 | ... | 1587 | 18754 | 48.985624 | 39.018116 | 77 | 6 | 279 | -208 | 0 | Luhanska |
| 2 | 2021-12-22 | Волинська область | 359 | 115 | 10.49 | 3.36 | 2248 | 65.71 | 699 | 20.43 | ... | 3085 | 49951 | 51.190885 | 24.870603 | 376 | 7 | 712 | -343 | 0 | Volynska |
| 3 | 2021-12-22 | Миколаївська область | 251 | 25 | 10.07 | 1.00 | 1251 | 50.20 | 965 | 38.72 | ... | 3666 | 54827 | 47.438413 | 31.778998 | 219 | 8 | 268 | -57 | 0 | Mykolaivska |
| 4 | 2021-12-22 | Черкаська область | 338 | 122 | 10.77 | 3.89 | 1350 | 43.03 | 1327 | 42.30 | ... | 7891 | 61376 | 49.260159 | 31.352536 | 309 | 16 | 925 | -632 | 0 | Cherkaska |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3570 | 2021-06-27 | Херсонська область | 0 | 30 | 0.00 | 8.90 | 0 | 0.00 | 307 | 91.10 | ... | 445 | 25112 | 46.673984 | 33.535980 | 15 | 0 | 3 | 12 | 0 | Khersonska |
| 3571 | 2021-06-27 | Закарпатська область | 0 | 50 | 0.00 | 7.54 | 0 | 0.00 | 613 | 92.46 | ... | 286 | 57582 | 48.403718 | 23.275854 | 3 | 0 | 21 | -18 | 0 | Zakarpatska |
| 3572 | 2021-06-27 | Рівненська область | 0 | 10 | 0.00 | 2.36 | 348 | 82.08 | 66 | 15.57 | ... | 294 | 75027 | 51.039594 | 26.391115 | 9 | 0 | 11 | -2 | 0 | Rivnenska |
| 3573 | 2021-06-27 | Миколаївська область | 0 | 30 | 0.00 | 8.85 | 108 | 31.86 | 201 | 59.29 | ... | 574 | 54827 | 47.438413 | 31.778998 | 22 | 2 | 13 | 7 | 0 | Mykolaivska |
| 3574 | 2021-06-27 | Сумська область | 0 | 0 | 0.00 | 0.00 | 30 | 4.53 | 632 | 95.47 | ... | 554 | 65456 | 51.106120 | 34.123810 | 15 | 0 | 25 | -10 | 0 | Sumska |
3575 rows × 25 columns
ukraine = gpd.read_file('data/ukraine.json')
ukraine = ukraine[['NAME_1', 'geometry']]
ukraine = ukraine.rename(columns={'NAME_1': 'region'})
ukraine.head()
| region | geometry | |
|---|---|---|
| 0 | Cherkasy | MULTIPOLYGON (((31.32614 48.74507, 31.31716 48... |
| 1 | Chernihiv | MULTIPOLYGON (((33.09283 50.50966, 33.09261 50... |
| 2 | Chernivtsi | MULTIPOLYGON (((24.93280 47.72794, 24.93301 47... |
| 3 | Crimea | MULTIPOLYGON (((33.79291 44.39153, 33.79465 44... |
| 4 | Dnipropetrovs'k | MULTIPOLYGON (((33.93176 47.48407, 33.92332 47... |
alt.Chart(ukraine).project().mark_geoshape(
fill = 'lightgray', stroke = 'white', strokeWidth = 1
).properties(
width = 800, height = 600
).encode().configure_view(
strokeWidth = 0
)
covid_stats_ukraine_df.region_en.unique()
array(['Vinnytsia', 'Volynska', 'Dnipropetrovska', 'Donetska',
'Zhytomyrskа', 'Zakarpatska', 'Zaporizka', 'Ivano-Frankivska',
'Kyivska', 'Kirovohradska', 'Luhanska', 'Lvivska', 'Mykolaivska',
'Kyiv', 'Odeska', 'Poltavska', 'Rivnenska', 'Sumska',
'Ternopilska', 'Kharkivska', 'Khersonska', 'Khmelnytska',
'Cherkaska', 'Chernivetska', 'Chernihivska'], dtype=object)
ukraine_geofaset_grid = pd.DataFrame(data={
'name': [
"Volynska", "Kyiv", "Chernivetska", "Sumska", "Kyivska", "Zhytomyrskа", "Luhanska", "Lvivska", "Ternopilska", "Rivnenska",
"Poltavska", "Kharkivska", "Zakarpatska", "Donetska", "Ivano-Frankivska", "Khmelnytska", "Vinnytsia", "Cherkaska", "Kirovohradska",
"Dnipropetrovska", "Mykolaivska", "Khersonska", "Chernivetska", "Odeska", "Zaporizka", "Crimea"],
'row': [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5],
'col': [2, 5, 6, 7, 5, 4, 8, 1, 2, 3, 6, 7, 1, 8, 2, 3, 4, 5, 6, 7, 5, 6, 2, 4, 7, 5],
})
ukraine_geofaset_grid
| name | row | col | |
|---|---|---|---|
| 0 | Volynska | 1 | 2 |
| 1 | Kyiv | 1 | 5 |
| 2 | Chernivetska | 1 | 6 |
| 3 | Sumska | 1 | 7 |
| 4 | Kyivska | 2 | 5 |
| 5 | Zhytomyrskа | 2 | 4 |
| 6 | Luhanska | 2 | 8 |
| 7 | Lvivska | 2 | 1 |
| 8 | Ternopilska | 2 | 2 |
| 9 | Rivnenska | 2 | 3 |
| 10 | Poltavska | 2 | 6 |
| 11 | Kharkivska | 2 | 7 |
| 12 | Zakarpatska | 3 | 1 |
| 13 | Donetska | 3 | 8 |
| 14 | Ivano-Frankivska | 3 | 2 |
| 15 | Khmelnytska | 3 | 3 |
| 16 | Vinnytsia | 3 | 4 |
| 17 | Cherkaska | 3 | 5 |
| 18 | Kirovohradska | 3 | 6 |
| 19 | Dnipropetrovska | 3 | 7 |
| 20 | Mykolaivska | 4 | 5 |
| 21 | Khersonska | 4 | 6 |
| 22 | Chernivetska | 4 | 2 |
| 23 | Odeska | 4 | 4 |
| 24 | Zaporizka | 4 | 7 |
| 25 | Crimea | 5 | 5 |
ukraine_geofaset_base = alt.Chart(ukraine_geofaset_grid).project().mark_square(
fill = 'lightgray', size=8000
).properties(
width = 800, height = 600
).encode(
x = alt.X('col:O'),
y = alt.Y('row:O'),
).configure_axis(
grid=False, domain=False, labels=False, ticks=False, title=None
).configure_view(
strokeWidth = 0
)
ukraine_geofaset_base
Now, we are ready to start analyzing existing data and create visualization. However, for that reason we firstly need to set the ground questions that we want to analyze in this work.
1. See the general statictics of covid spread in Ukraine (for the last 6 month).
2. Discover the vaccination tendends in Ukraine (for the last 6 month).
newest_date_covid_stats_ukraine_df = covid_stats_ukraine_df[covid_stats_ukraine_df.date == covid_stats_ukraine_df.date.max()]
newest_date_covid_stats_ukraine_df = newest_date_covid_stats_ukraine_df[['recovered', 'existing', 'suspicion', 'deaths']].sum()
newest_date_covid_stats_ukraine_df = pd.DataFrame({'status': newest_date_covid_stats_ukraine_df.index, 'value': newest_date_covid_stats_ukraine_df.values})
alt.Chart(newest_date_covid_stats_ukraine_df).mark_bar(size=60).encode(
x=alt.X('value:Q', title='Number of people'),
color=alt.Color('status:N', title='Status'),
tooltip=[
alt.Tooltip('status:N', title='Status'),
alt.Tooltip('value:Q', title='Number of people'),
]
).properties(
width=800,
height=70,
)
This visualization shows the newest daily updated destribution of covid case.
Alterntive approach for visualizing this data would be to use the standart barplot or pieplot (for some reasons isn't working), either of these solutions will work good in this situation.
alt.Chart(covid_stats_ukraine_df).transform_fold(
['recovered', 'existing', 'suspicion', 'deaths'],
).mark_area().encode(
x=alt.X('month(date):O', title='Month'),
y=alt.Y('value:Q', aggregate='sum', title='Covid patients status propostion'),
color=alt.Color('key:N', title='Patient Status'),
order=alt.Order('value:Q', aggregate='sum', sort='ascending'),
tooltip=[
alt.Tooltip('key:N', title='Status'),
alt.Tooltip('value:Q', aggregate='sum', title='Number of people'),
alt.Tooltip('month(date):O', title='Month'),
]
).properties(width=800, height=500)
This viualization was made to compare the dynamic of covid cases registration through last 6 month, and show a new covid wave that happened this autumn.
Alternative approache was to use bar plot with normalization to simplify the plot a bit, by this areaplot hytlight better the new covid way effect.
confirmed_case_by_month_and_region = covid_and_vaccination_ukraine_df.groupby([pd.Grouper(key='date', freq='1M'), 'region'])[['confirmed']].sum().reset_index()
confirmed_case_by_month_and_region = confirmed_case_by_month_and_region.merge(covid_and_vaccination_ukraine_df, left_on=['region', 'date'], right_on=['region', 'date'])
confirmed_case_by_month_and_region = confirmed_case_by_month_and_region[['date', 'region', 'lat', 'lng', 'confirmed_x']]
confirmed_case_by_month_and_region = confirmed_case_by_month_and_region.rename(columns={'confirmed_x': 'confirmed'})
confirmed_case_by_month_and_region
| date | region | lat | lng | confirmed | |
|---|---|---|---|---|---|
| 0 | 2021-06-30 | Івано-Франківська область | 48.701932 | 24.619344 | 346619 |
| 1 | 2021-06-30 | Волинська область | 51.190885 | 24.870603 | 248528 |
| 2 | 2021-06-30 | Вінницька область | 48.920517 | 28.685484 | 284306 |
| 3 | 2021-06-30 | Дніпропетровська область | 48.301142 | 34.844169 | 542223 |
| 4 | 2021-06-30 | Донецька область | 48.047635 | 37.674086 | 362328 |
| ... | ... | ... | ... | ... | ... |
| 145 | 2021-11-30 | Хмельницька область | 49.507456 | 26.929458 | 2654143 |
| 146 | 2021-11-30 | Черкаська область | 49.260159 | 31.352536 | 2267734 |
| 147 | 2021-11-30 | Чернівецька область | 48.268275 | 25.976996 | 2280147 |
| 148 | 2021-11-30 | Чернігівська область | 51.355018 | 32.004509 | 1707910 |
| 149 | 2021-11-30 | м. Київ | 50.448668 | 30.547911 | 5741305 |
150 rows × 5 columns
background = alt.Chart(ukraine).project().mark_geoshape(
fill = 'skyblue', stroke = 'white', strokeWidth = 1
).properties(
width = 800, height = 600
).encode()
def timestamp(t):
return pd.to_datetime(t).timestamp() * 1000
slider = alt.binding_range(
step=30 * 24 * 60 * 60 * 1000, # 30 days in milliseconds
min=timestamp(confirmed_case_by_month_and_region.date.min()),
max=timestamp(confirmed_case_by_month_and_region.date.max()),
name='Select Month:'
)
select_date = alt.selection_single(
fields=['date'],
bind=slider,
init={'date': timestamp(confirmed_case_by_month_and_region.date.min())},
name='slider')
points_base = alt.Chart(confirmed_case_by_month_and_region).encode(
longitude=alt.Longitude('lng:Q'),
latitude=alt.Latitude('lat:Q')
)
points = points_base.mark_circle().encode(
size=alt.Size(
'confirmed:Q',
title='Number of confirmed cases',
scale=alt.Scale(domain=[
confirmed_case_by_month_and_region.confirmed.min(), confirmed_case_by_month_and_region.confirmed.max()
], range=[300, 2500])
),
color=alt.Color('confirmed:Q', scale=alt.Scale(scheme='orangered', domain=[
confirmed_case_by_month_and_region.confirmed.min(), confirmed_case_by_month_and_region.confirmed.max()
])),
tooltip=[
alt.Tooltip('region:N', title='Region'),
alt.Tooltip('confirmed:Q', title='Number of confirmed cases')
],
).transform_filter(
"(year(datum.date) == year(slider.date[0])) && "
"(month(datum.date) == month(slider.date[0]))"
).add_selection(select_date)
text_labels = points_base.mark_text().encode(
text=alt.Text('region:N'),
)
(background + points).configure_view(
strokeWidth = 0
)
This viualization is designed to show the increase of new confirmed covid case and how quickly they increase throught the last 6 month.
However, the main disadvantage of this approach is that the confirmed case are not normalized by the region population and becaue of that bigger and pore populated regions gets more attantion on the map.
The second problem with this visualization is pour UI of standard HTML range that are used by Altair - the right preview of range value is bad (since the only way that worked for me is using timestamps) and the range lack of the stage for different month which creates a bad UX. Alternative approach here would be to use the radio select buttons, but that is not quite the expereince I would like to achive.
region_base = alt.Chart(covid_stats_ukraine_df).mark_bar().transform_fold(
['recovered', 'existing', 'suspicion', 'deaths'],
).encode(
x=alt.X('month(date):O', title='Month'),
y=alt.Y('value:Q', aggregate = 'max', stack='normalize', title=None),
color=alt.Color('key:N', title='Status'),
order=alt.Order('value:Q', aggregate='sum', sort='ascending'),
tooltip=[
alt.Tooltip('key:N', title='Status'),
alt.Tooltip('value:Q', aggregate='max', title='Number of people'),
alt.Tooltip('month(date):O', title='Month'),
],
detail = alt.Detail('region_en:N')
).properties(width=120, height=120)
region_base_title = alt.Chart(covid_stats_ukraine_df).mark_text(dy = -70).encode(
text = alt.Text('region_en:N')
)
region_base += region_base_title
region_base.transform_filter(alt.datum.region_en == 'Vinnytsia')
empty_cell = alt.Chart(
alt.Data(
values=[{'x': covid_stats_ukraine_df.date.min(), 'y': 1}, {'x': covid_stats_ukraine_df.date.max(), 'y': 0}]
)
).mark_point(
opacity=0
).encode(
x = alt.X('x:T', axis=alt.Axis(title=None, labelOpacity=0, tickOpacity=0, domainOpacity=0, grid=False)),
y = alt.Y('y:Q', axis=alt.Axis(title=None, labelOpacity=0, tickOpacity=0, domainOpacity=0, grid=False))
).properties(width=120, height=120)
empty_cell
chart = alt.vconcat()
for row in ukraine_geofaset_grid.row.unique():
chart_row = alt.hconcat()
for col in range(1, 9):
if col in ukraine_geofaset_grid[ukraine_geofaset_grid.row == row].col.unique():
region_en = ukraine_geofaset_grid[(ukraine_geofaset_grid.row == row) & (ukraine_geofaset_grid.col == col)].name.values[0]
if region_en in covid_stats_ukraine_df.region_en.unique():
chart_row |= region_base.transform_filter(alt.datum.region_en == region_en)
else:
chart_row |= empty_cell
else:
chart_row |= empty_cell
chart &= chart_row
chart.properties(
title='Covid cases status proportions'
).configure_title(
fontSize=30,
anchor='middle',
dy = -20
).configure_view(
strokeWidth = 0
)
This visualization is a bit similar to the one used in 1.1 section, but with the difference that now all plots are normalized. That's done for vanishing the difference in regions population, but showing that for all the region the increase in the new covid case can be seen in the autumn months.
Alternatively, if we decide to how only the trends in the existing confirmed covid case, then we can use standart line plot and show only existing covid case, but again due to the lack of cities populations the visialization results would be unfair to the smaller cities.
vaccination_ukraine_df.head()
| date | region | Moderna | AstraZeneca | Moderna_pct | AstraZeneca_pct | Pfizer-BioNTech | Pfizer-BioNTech_pct | Sinovac (CoronaVac) | Sinovac (CoronaVac)_pct | Johnson & Johnson | Johnson & Johnson_pct | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021-12-22 | Київська область | 154 | 215 | 2.81 | 3.92 | 2780 | 50.73 | 2331 | 42.54 | 0.0 | 0.0 |
| 1 | 2021-12-22 | Луганська область | 409 | 58 | 18.78 | 2.66 | 972 | 44.63 | 739 | 33.93 | 0.0 | 0.0 |
| 2 | 2021-12-22 | Волинська область | 359 | 115 | 10.49 | 3.36 | 2248 | 65.71 | 699 | 20.43 | 0.0 | 0.0 |
| 3 | 2021-12-22 | Миколаївська область | 251 | 25 | 10.07 | 1.00 | 1251 | 50.20 | 965 | 38.72 | 0.0 | 0.0 |
| 4 | 2021-12-22 | Черкаська область | 338 | 122 | 10.77 | 3.89 | 1350 | 43.03 | 1327 | 42.30 | 0.0 | 0.0 |
region_base = alt.Chart(covid_and_vaccination_ukraine_df).mark_area().transform_fold(
['Moderna', 'AstraZeneca', 'Pfizer-BioNTech', 'Sinovac (CoronaVac)', 'Johnson & Johnson'],
).encode(
x=alt.X('month(date):O', title='Month'),
y=alt.Y('value:Q', aggregate = 'sum', title=None),
color=alt.Color('key:N', title='Vaccination manufacturer'),
tooltip=[
alt.Tooltip('key:N', title='Vaccination manufacturer'),
alt.Tooltip('value:Q', aggregate='sum', title='Number of people'),
alt.Tooltip('month(date):O', title='Month'),
],
detail = alt.Detail('region_en:N')
).properties(width=120, height=120)
region_base_title = alt.Chart(covid_and_vaccination_ukraine_df).mark_text(dy = -70).encode(
text = alt.Text('region_en:N')
)
region_base += region_base_title
region_base.transform_filter(alt.datum.region_en == 'Vinnytsia')
empty_cell = alt.Chart(
alt.Data(
values=[{'x': covid_and_vaccination_ukraine_df.date.min(), 'y': 100_000}, {'x': covid_and_vaccination_ukraine_df.date.max(), 'y': 0}]
)
).mark_point(
opacity=0
).encode(
x = alt.X('x:T', axis=alt.Axis(title=None, labelOpacity=0, tickOpacity=0, domainOpacity=0, grid=False)),
y = alt.Y('y:Q', axis=alt.Axis(title=None, labelOpacity=0, tickOpacity=0, domainOpacity=0, grid=False))
).properties(width=120, height=120)
empty_cell
chart = alt.vconcat()
for row in ukraine_geofaset_grid.row.unique():
chart_row = alt.hconcat()
for col in range(1, 9):
if col in ukraine_geofaset_grid[ukraine_geofaset_grid.row == row].col.unique():
region_en = ukraine_geofaset_grid[(ukraine_geofaset_grid.row == row) & (ukraine_geofaset_grid.col == col)].name.values[0]
if region_en in covid_stats_ukraine_df.region_en.unique():
chart_row |= region_base.transform_filter(alt.datum.region_en == region_en)
else:
chart_row |= empty_cell
else:
chart_row |= empty_cell
chart &= chart_row
chart.properties(
title='Vaccination proportions'
).configure_title(
fontSize=30,
anchor='middle',
dy = -20
).configure_view(
strokeWidth = 0
)
This visualization tries to show the increase of the vaccination trend in the autumn (which is most likely due to new quarantine prohibitions and the increase of new covid case increase shown in the previous sections). Also, this visualization shows a bit unexpected outlier region - Zakarpatska - which is the only region when the 'Johnson & Johnson' vaccination was used.